# -*- coding: utf-8 -*-
import scrapy
from ipproxy.items import IpproxyItem


class Data5uSpider(scrapy.Spider):
    name = 'data5u'
    allowed_domains = ['www.data5u.com']

    def __init__(self, page_count=5, *args, **kwargs):
        super(Data5uSpider, self).__init__(*args, **kwargs)
        self.start_urls = []
        for i in range(1, page_count+1):
            self.start_urls.append("http://www.data5u.com/free/gngn/index{0}.shtml".format(i))

    def parse(self, response):
        all_trs = response.xpath('/html/body/div[5]/ul/li[2]/ul')
        for tr in all_trs[1:]:
            row = IpproxyItem()
            ipinfo = tr.xpath('span/li//text()').extract()
            row['ip'] = ipinfo[0]
            row['port'] = ipinfo[1]
            row['anonymous'] = ipinfo[2]
            row['proxy_type'] = ipinfo[3]
            row['country'] = ' '.join(ipinfo[4:7])
            if len(ipinfo) == 8:
                row['speed'] = float(ipinfo[7].replace(' 秒', ''))
            else:
                row['speed'] = float(ipinfo[8].replace(' 秒', ''))
            row['checked_time'] = None  # ipinfo[6]
            row['proxy_name'] = self.name
            yield row
        pass
